1 Set-up

if(!dir.exists("out")) {
    dir.create("out", showWarnings = FALSE, recursive = TRUE)
}

1.1 libs

suppressPackageStartupMessages({
    library(tidyverse)
    library(lubridate)
    library(broom)
    library(psych)
    library(heatmaply)
    library(patchwork)
    library(ComplexHeatmap)
})
nest <- nest_legacy
unnest <- unnest_legacy

1.2 Data Read-In

1.2.1 tRNA array probes

tRNAprobes <- 
read_delim( 
    delim = "\t",
    file = "../tRNA-GtRNAdb/450k_coresponding_hg19tRNAs.bed",
    col_types = "ciicciiciciicccc",
    col_names = c("pChr","pStart","pEnd","probeID",
        as.character(
            read_delim(
                delim = "\t",
                file = "../tRNA-GtRNAdb/std_tRNA_header.txt",
                col_names = FALSE,
                col_types = paste0(rep("c", 12), collapse = "")
            )
        )
    )
)

1.2.2 tRNAge gene set

tRNAge <- read_delim(
    delim = "\t",
     col_names = "tRNAname",
     col_types = "c",
     file = "../tRNA-GtRNAdb/GenWideSigWintRNAs.txt"
) %>%
    pull()

tRNAgeBB <- read_delim(
    delim = "\t",
     col_names = "tRNAname",
     col_types = "c",
     file = "../tRNA-GtRNAdb/BB_GWS_tRNA.txt"
) %>%
    pull()
swsBB23 <- read_tsv(
    "../tRNA-GtRNAdb/swsBB23.tsv",
    col_names = "tRNAname", col_types = "c"
) %>% pull()

gwsBB6 <- read_tsv(
    "../tRNA-GtRNAdb/BB_GWS_tRNA.txt",
    col_names = "tRNAname", col_types = "c"
) %>% pull()

bltRNAs <- read_tsv(
    "../tRNA-GtRNAdb/tRNAs-in-hg19-blacklist-v2.txt",
    col_names = "tRNAname", col_types = "c"
) %>% pull()

swsBB23bl <- swsBB23[!swsBB23 %in% bltRNAs]
gwsBB6bl <- gwsBB6[!gwsBB6 %in% bltRNAs]

1.2.3 Methylation Data

dataTest <- readRDS(
    file = "data/tRNAprobesNormCancerArrayPairs.Rds"
)
tRNAmethCancerNormal <- dataTest %>% 
    unnest() %>% 
    as_tibble() %>%
    filter(sample_type %in% c("Solid Tissue Normal", "Primary Tumor")) %>%
    mutate(tRNAge = tRNAname %in% swsBB23bl) %>%
    filter(!is.na(Beta_value))

samples used

tRNAmethCancerNormal %>% 
    distinct(file_id, file_name, sample_id, case_id, sample_type, primary_site, age) %>% 
    arrange(sample_type, primary_site, age) %>%
    write_tsv("out/TCGA_samples_used.tsv")
tRNAmethCancerNormal %>% distinct(case_id) %>% nrow()
[1] 733
tRNAsCovered <- 
tRNAmethCancerNormal %>% distinct(tRNAname)
tRNAsCovered
tRNAsCovered %>%
    filter(tRNAname %in% gwsBB6bl)

tRNAsCovered %>%
    filter(tRNAname %in% swsBB23bl)

2 variance

Levene <- 
onewaytests::homog.test(data = tRNAmethCancerNormal, Beta_value ~ sample_type, method = "Levene")

  Levene's Homogeneity Test (alpha = 0.05) 
----------------------------------------------- 
  data : Beta_value and sample_type 

  statistic  : 2782.235 
  num df     : 1 
  denum df   : 73405 
  p.value    : 0 

  Result     : Variances are not homogeneous. 
----------------------------------------------- 
#Levene %>% str()

Brown_forsythe <- 
onewaytests::bf.test(data = tRNAmethCancerNormal, Beta_value ~ sample_type)

  Brown-Forsythe Test (alpha = 0.05) 
------------------------------------------------------------- 
  data : Beta_value and sample_type 

  statistic  : 794.4947 
  num df     : 1 
  denom df   : 68163.63 
  p.value    : 8.495945e-174 

  Result     : Difference is statistically significant. 
------------------------------------------------------------- 
#Brown_forsythe %>% str()

3 By tRNA age modeling

3.1 Normal

normalAgeModelsBytRNA <- tRNAmethCancerNormal %>%
    dplyr::filter(sample_type == "Solid Tissue Normal") %>%
    group_by(tRNAname) %>%
    #group_by(tRNAname,primary_site) %>%
    nest() %>%
    mutate(model = purrr::map(data, ~ lm(age ~ Beta_value, data = .)))
#bonfer <- 0.05 / tRNAmethCancerNormal %>% dplyr::select(probeID) %>% distinct() %>% nrow()
bonfer <- 0.05 / normalAgeModelsBytRNA %>% nrow()
bonfer
[1] 0.001111111
normalAgeModelsBytRNAG <- normalAgeModelsBytRNA %>%
    unnest(model %>% purrr::map(glance)) %>% 
    arrange(p.value)
normalAgeModelsBytRNAG %>%
    dplyr::select(-data,-model)
normalAgeModelsBytRNAGsig <- normalAgeModelsBytRNAG %>%
    dplyr::select(-data, -model) %>%
    dplyr::filter(p.value < bonfer) %>% 
    arrange(p.value)

normalAgeModelsBytRNAGsig
normalAgeModelsBytRNA %>%
    unnest(model %>% purrr::map(tidy)) %>% 
    filter(term == "Beta_value", p.value < bonfer)
normalAgeModelsBytRNAGsig %>% distinct(tRNAname)
normalAgeModelsBytRNAGsig %>% 
    dplyr::filter(tRNAname %in% gwsBB6bl)

normalAgeModelsBytRNAGsig %>% 
    dplyr::filter(tRNAname %in% swsBB23bl)
# plots <- 
# tRNAmethCancerNormal %>%
#   group_by(aa) %>%
#   do(plot=
#       ggplot(aes())
#   )

3.1.1 Clustering

normalAgeModelsBytRNAG %>% select(-data, -model)

3.2 Cancer

cancerAgeModelsBytRNA <- tRNAmethCancerNormal %>%
    dplyr::filter(sample_type == "Primary Tumor") %>%
    group_by(tRNAname) %>%
    #group_by(tRNAname,primary_site) %>%
    nest() %>%
    mutate(model = purrr::map(data, ~ lm(age ~ Beta_value, data = .)))
cancerAgeModelsBytRNAG <- cancerAgeModelsBytRNA %>%
    unnest(model %>% purrr::map(glance)) %>% 
    arrange(p.value)
cancerAgeModelsBytRNAG %>%
    dplyr::select(-data, -model)
cancerAgeModelsBytRNAGsig <- cancerAgeModelsBytRNAG %>%
    dplyr::select(-data, -model) %>%
    dplyr::filter(p.value < bonfer) %>% 
    arrange(p.value)
cancerAgeModelsBytRNAGsig
cancerAgeModelsBytRNAGsig %>% distinct(tRNAname)
cancerAgeModelsBytRNAGsig %>% 
    dplyr::filter(tRNAname %in% swsBB23bl)

4 By tRNA and Tissue Age modeling

tRNAmethCancerNormal %>%
    dplyr::filter(sample_type == "Solid Tissue Normal") %>%
    group_by(tRNAname, primary_site) %>%
    summarise(n = n()) %>%
    spread(primary_site, n) %>%
    column_to_rownames("tRNAname") %>%
    data.matrix() %>% 
    #heatmaply()
    Heatmap(
        row_names_gp = gpar(fontsize = 8),
        na_col = "black",
        heatmap_width = unit(5.5, "inches"),
        heatmap_height = unit(7.5, "inches")#,
    )

ggplotly(dynamicTicks = TRUE,
tRNAmethCancerNormal %>%
    dplyr::filter(sample_type == "Solid Tissue Normal") %>%
    group_by(tRNAname, primary_site) %>%
    summarise(n = n()) %>%
    ggplot(aes(n)) + 
        geom_density(aes(colour = primary_site))
)
normalAgeModelsBytRNABySite <- tRNAmethCancerNormal %>%
    dplyr::filter(sample_type == "Solid Tissue Normal") %>%
    group_by(tRNAname, primary_site) %>%
    nest() %>%
    mutate(model = purrr::map(data, ~ lm(age ~ Beta_value, data = .)))
normalAgeModelsBytRNABySiteG <- normalAgeModelsBytRNABySite %>%
    unnest(model %>% purrr::map(glance)) %>% 
    arrange(p.value)
essentially perfect fit: summary may be unreliable
#normalAgeModelsBytRNABySiteG
bonfertRNATissue <- 0.05 / normalAgeModelsBytRNABySiteG %>% nrow()
bonfertRNATissue
[1] 5.868545e-05
normalAgeModelsBytRNABySiteGsig <- normalAgeModelsBytRNABySiteG %>%
    dplyr::filter(p.value < bonfertRNATissue) %>%
    dplyr::select(-data, -model)
normalAgeModelsBytRNABySiteGsig
normalAgeModelsBytRNABySiteGsig %>% 
    dplyr::filter(tRNAname %in% swsBB23bl)
normalAgeModelsBytRNABySiteG %>% 
    dplyr::select(-data, -model) %>%
    dplyr::filter(tRNAname %in% swsBB23bl) %>%
    dplyr::filter(p.value < bonfer) ###!!! not correcting for all tests - use bonfertRNATissue

4.0.1 Clustering

pvalueByTissueAndtRNA <- 
normalAgeModelsBytRNABySiteG %>%
    ungroup() %>%
    dplyr::select(-data, -model) %>%
    select(tRNAname, p.value, primary_site) %>%
    spread(primary_site, p.value)
    #tRNAname,primary_site,p.value
    #spread(tRNAname,p.value)

lowCoverageTissues <- 
normalAgeModelsBytRNABySiteG %>%
    ungroup() %>%
    dplyr::select(-data, -model) %>%
    select(tRNAname, p.value, primary_site) %>%
    group_by(primary_site) %>%
    summarise(Nna = length(which(is.na(p.value) | is.nan(p.value))), n = n(), p = Nna / n) %>%
    filter(p > 0.8) %>%
    mutate(primary_site = as.character(primary_site)) %>%
    pull(primary_site)

pvalueByTissueAndtRNAm <- 
pvalueByTissueAndtRNA %>%
    select(-tRNAname) %>%
    select(-lowCoverageTissues) %>%
    data.matrix()
    
rownames(pvalueByTissueAndtRNAm) <- pvalueByTissueAndtRNA$tRNAname

pvalueByTissueAndtRNAm %>% dim()
[1] 45 16
pvalueByTissueAndtRNAm %>% heatmaply::heatmaply(
    main = "Change in DNAm with Age by tissue (p-value)"
)
'heatmap' objects don't have these attributes: 'showlegend'
Valid attributes include:
'type', 'visible', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'z', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'transpose', 'xtype', 'ytype', 'zsmooth', 'connectgaps', 'xgap', 'ygap', 'zhoverformat', 'hovertemplate', 'zauto', 'zmin', 'zmax', 'zmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
'heatmap' objects don't have these attributes: 'showlegend'
Valid attributes include:
'type', 'visible', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'z', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'transpose', 'xtype', 'ytype', 'zsmooth', 'connectgaps', 'xgap', 'ygap', 'zhoverformat', 'hovertemplate', 'zauto', 'zmin', 'zmax', 'zmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

pvalueByTissueAndtRNAm %>%
Heatmap(
    na_col = "black",
    heatmap_width = unit(5.5, "inches"),
    heatmap_height = unit(7.5, "inches"),
    column_title_gp = gpar(fontsize = 16, fontface = "bold"),
    column_title = "Change in DNAm with Age\nby tissue (p-value)",
    name = "p-value\n",
    column_names_rot = 45,
    column_names_gp = gpar(fontsize = 10),
    row_names_gp = gpar(
        fontsize = 10,
        col = if_else(rownames(.) %in% tRNAgeBB, "red", "black")
    ),
    row_title = "tRNA gene"
)

#pvalueByTissueAndtRNAm %>%

tmp <- apply(data.matrix(pvalueByTissueAndtRNAm < 0.05), 2, as.character)
rownames(tmp) <- rownames(pvalueByTissueAndtRNAm)
tmp %>%
Heatmap(
    col = structure(c("red", "blue"), names = c("TRUE", "FALSE")),
    na_col = "black",
    heatmap_width = unit(5.5, "inches"),
    heatmap_height = unit(7.5, "inches"),
    column_title_gp = gpar(fontsize = 16, fontface = "bold"),
    column_title = "Change in DNAm with Age\nby tissue (p-value)",
    name = "p-value\n",
    column_names_rot = 45,
    column_names_gp = gpar(fontsize = 10),
    row_split = rownames(.) %>%
        gsub(pattern = "tRNA-(\\w+)-\\w+-\\w+-\\d+", replacement = "\\1"),
    row_names_gp = gpar(
        fontsize = 10,
        col = if_else(rownames(.) %in% tRNAgeBB, "red", "black")
    ),
    row_title = "tRNA gene"
)
normalAgeModelsBytRNABySiteGl <- 
normalAgeModelsBytRNABySiteG %>% 
    select(-data) %>% 
    unnest(model %>% map(broom::tidy)) %>% 
    select(-std.error, -statistic1, -p.value1) %>% 
    spread(term,estimate)
essentially perfect fit: summary may be unreliable
slopeByTissueAndtRNA <- 
normalAgeModelsBytRNABySiteGl %>%
    ungroup() %>%
    #dplyr::select(-data,-model) %>%
    select(tRNAname, Beta_value, primary_site) %>%
    #mutate(Beta_value = log(Beta_value)) %>%
    spread(primary_site, Beta_value)
    #tRNAname,primary_site,p.value
    #spread(tRNAname,p.value)

lowCoverageTissues <- 
normalAgeModelsBytRNABySiteGl %>%
    ungroup() %>%
    #dplyr::select(-data,-model) %>%
    select(tRNAname, Beta_value, primary_site) %>%
    group_by(primary_site) %>%
    summarise(Nna = length(which(is.na(Beta_value) | is.nan(Beta_value))), n = n(), p = Nna / n) %>%
    filter(p > 0.5) %>%
    mutate(primary_site = as.character(primary_site)) %>%
    pull(primary_site)

slopeByTissueAndtRNAm <- 
slopeByTissueAndtRNA %>%
    select(-tRNAname) %>%
    select(-lowCoverageTissues) %>%
    data.matrix()
    
rownames(slopeByTissueAndtRNAm) <- slopeByTissueAndtRNA$tRNAname

slopeByTissueAndtRNAm %>% dim()
[1] 45 18
slopeByTissueAndtRNAm %>% heatmaply::heatmaply(
    main = "Change in DNAm with Age by tissue (slope)"
)
'heatmap' objects don't have these attributes: 'showlegend'
Valid attributes include:
'type', 'visible', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'z', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'transpose', 'xtype', 'ytype', 'zsmooth', 'connectgaps', 'xgap', 'ygap', 'zhoverformat', 'hovertemplate', 'zauto', 'zmin', 'zmax', 'zmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
'heatmap' objects don't have these attributes: 'showlegend'
Valid attributes include:
'type', 'visible', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'z', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'transpose', 'xtype', 'ytype', 'zsmooth', 'connectgaps', 'xgap', 'ygap', 'zhoverformat', 'hovertemplate', 'zauto', 'zmin', 'zmax', 'zmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

slopeByTissueAndtRNAmHeatMap <- 
slopeByTissueAndtRNAm %>%
Heatmap(
    na_col = "black",
    heatmap_width = unit(5.5, "inches"),
    heatmap_height = unit(7.5, "inches"),
    column_title_gp = gpar(fontsize = 16, fontface = "bold"),
    column_title = "Change in DNAm with Age\nby tissue (slope)",
    name = "slope\n",
    column_names_rot = 45,
    column_names_gp = gpar(fontsize = 10),
    row_names_gp = gpar(
        fontsize = 10,
        col = if_else(rownames(.) %in% tRNAgeBB, "red", "black")
    ),
    row_title = "tRNA gene"
)

png(
    filename = "graphics/slopeByTissueAndtRNAmHeatMap_bl.png",
    width = 7, height = 8, units = "in", res = 192
)
slopeByTissueAndtRNAmHeatMap
dev.off()
jpeg 
   2 
slopeByTissueAndtRNAmHeatMap

slopeByTissueAndtRNAmAAsplitHeatMap <- 
slopeByTissueAndtRNAm %>%
Heatmap(
    na_col = "black",
    heatmap_width = unit(5.5, "inches"),
    heatmap_height = unit(7.5, "inches"),
    column_title_gp = gpar(fontsize = 16, fontface = "bold"),
    column_title = "Change in DNAm with Age\nby tissue (slope)",
    name = "slope\n",
    column_names_rot = 45,
    column_names_gp = gpar(fontsize = 10),
    row_split = rownames(.) %>%
        gsub(pattern = "tRNA-(\\w+)-\\w+-\\w+-\\d+", replacement = "\\1"),
    row_names_gp = gpar(
        fontsize = 10,
        col = if_else(rownames(.) %in% tRNAgeBB, "red", "black")
    ),
    row_title = "tRNA gene"
)

png(
    filename = "graphics/slopeByTissueAndtRNAmAAsplitHeatMap_bl.png",
    width = 7, height = 8, units = "in", res = 192
)
slopeByTissueAndtRNAmAAsplitHeatMap
dev.off()
jpeg 
   2 
slopeByTissueAndtRNAmAAsplitHeatMap

5 Session Info

sessionInfo()
R version 3.6.2 (2019-12-12)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.3 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so

locale:
 [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C               LC_TIME=en_GB.UTF-8       
 [4] LC_COLLATE=en_GB.UTF-8     LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
 [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
[10] LC_TELEPHONE=C             LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] ComplexHeatmap_2.0.0 patchwork_0.0.1      heatmaply_0.16.0     viridis_0.5.1       
 [5] viridisLite_0.3.0    plotly_4.9.1         psych_1.8.12         broom_0.5.2         
 [9] lubridate_1.7.4      forcats_0.4.0        stringr_1.4.0        dplyr_0.8.3         
[13] purrr_0.3.3          readr_1.3.1          tidyr_1.0.0          tibble_2.1.3        
[17] ggplot2_3.2.1        tidyverse_1.2.1      testthat_2.3.0       devtools_2.2.1      
[21] usethis_1.5.1        reprex_0.3.0        

loaded via a namespace (and not attached):
  [1] readxl_1.3.1        backports_1.1.5     circlize_0.4.8      plyr_1.8.4         
  [5] lazyeval_0.2.2      crosstalk_1.0.0     digest_0.6.22       foreach_1.4.7      
  [9] htmltools_0.4.0     gdata_2.18.0        magrittr_1.5        memoise_1.1.0      
 [13] cluster_2.1.0       gclus_1.3.2         openxlsx_4.1.3      remotes_2.1.0      
 [17] modelr_0.1.5        prettyunits_1.0.2   colorspace_1.4-1    rvest_0.3.5        
 [21] haven_2.2.0         xfun_0.10           callr_3.3.2         crayon_1.3.4       
 [25] jsonlite_1.6        zeallot_0.1.0       iterators_1.0.12    glue_1.3.1         
 [29] registry_0.5-1      gtable_0.3.0        webshot_0.5.1       GetoptLong_0.1.7   
 [33] car_3.0-4           pkgbuild_1.0.6      shape_1.4.4         abind_1.4-5        
 [37] scales_1.0.0        Rcpp_1.0.3          xtable_1.8-4        clue_0.3-57        
 [41] foreign_0.8-72      htmlwidgets_1.5.1   httr_1.4.1          gplots_3.0.1.1     
 [45] RColorBrewer_1.1-2  ellipsis_0.3.0      pkgconfig_2.0.3     tidyselect_0.2.5   
 [49] labeling_0.3        rlang_0.4.1         reshape2_1.4.3      later_1.0.0        
 [53] munsell_0.5.0       cellranger_1.1.0    tools_3.6.2         cli_1.1.0          
 [57] generics_0.0.2      moments_0.14        evaluate_0.14       fastmap_1.0.1      
 [61] yaml_2.2.0          processx_3.4.1      knitr_1.25          fs_1.3.1           
 [65] zip_2.0.4           caTools_1.17.1.2    dendextend_1.12.0   packrat_0.5.0      
 [69] nlme_3.1-143        mime_0.7            xml2_1.2.2          compiler_3.6.2     
 [73] rstudioapi_0.10     curl_4.2            png_0.1-7           stringi_1.4.3      
 [77] ps_1.3.0            desc_1.2.0          lattice_0.20-38     vctrs_0.2.0        
 [81] pillar_1.4.2        lifecycle_0.1.0     GlobalOptions_0.1.1 data.table_1.12.6  
 [85] bitops_1.0-6        seriation_1.2-8     httpuv_1.5.2        R6_2.4.0           
 [89] onewaytests_2.4     promises_1.1.0      TSP_1.1-7           KernSmooth_2.23-16 
 [93] gridExtra_2.3       rio_0.5.16          sessioninfo_1.1.1   codetools_0.2-16   
 [97] MASS_7.3-51.5       gtools_3.8.1        assertthat_0.2.1    pkgload_1.0.2      
[101] rprojroot_1.3-2     rjson_0.2.20        withr_2.1.2         nortest_1.0-4      
[105] mnormt_1.5-5        parallel_3.6.2      hms_0.5.2           rmarkdown_1.16     
[109] carData_3.0-2       Cairo_1.5-10        shiny_1.4.0         base64enc_0.1-3    

6 References

---
title: "05 - GenomicDataCommons Normal Vs. Cancer tissue tRNA gene methylation - Age Modeling"
author: "Richard J. Acton"
date: "`r Sys.Date()`"
output:
  html_document:
    df_print: paged
    fig_caption: yes
    number_sections: yes
    toc: yes
    toc_float: yes
  html_notebook:
    fig_caption: yes
    number_sections: yes
    toc: yes
    toc_float: yes
editor_options: 
  chunk_output_type: inline
bibliography: "`r normalizePath('../library.bib')`"
---

# Set-up

```{r}
if(!dir.exists("out")) {
	dir.create("out", showWarnings = FALSE, recursive = TRUE)
}
```

## libs

```{r}
suppressPackageStartupMessages({
	library(tidyverse)
	library(lubridate)
	library(broom)
	library(psych)
	library(heatmaply)
	library(patchwork)
	library(ComplexHeatmap)
})
```

```{r}
nest <- nest_legacy
unnest <- unnest_legacy
```

## Data Read-In

### tRNA array probes

```{r}
tRNAprobes <- 
read_delim(	
	delim = "\t",
	file = "../tRNA-GtRNAdb/450k_coresponding_hg19tRNAs.bed",
	col_types = "ciicciiciciicccc",
	col_names = c("pChr","pStart","pEnd","probeID",
		as.character(
			read_delim(
				delim = "\t",
				file = "../tRNA-GtRNAdb/std_tRNA_header.txt",
				col_names = FALSE,
				col_types = paste0(rep("c", 12), collapse = "")
			)
		)
	)
)

```

### tRNAge gene set

```{r}
tRNAge <- read_delim(
	delim = "\t",
	 col_names = "tRNAname",
	 col_types = "c",
	 file = "../tRNA-GtRNAdb/GenWideSigWintRNAs.txt"
) %>%
	pull()

tRNAgeBB <- read_delim(
	delim = "\t",
	 col_names = "tRNAname",
	 col_types = "c",
	 file = "../tRNA-GtRNAdb/BB_GWS_tRNA.txt"
) %>%
	pull()
```

```{r}
swsBB23 <- read_tsv(
	"../tRNA-GtRNAdb/swsBB23.tsv",
	col_names = "tRNAname", col_types = "c"
) %>% pull()

gwsBB6 <- read_tsv(
	"../tRNA-GtRNAdb/BB_GWS_tRNA.txt",
	col_names = "tRNAname", col_types = "c"
) %>% pull()

bltRNAs <- read_tsv(
	"../tRNA-GtRNAdb/tRNAs-in-hg19-blacklist-v2.txt",
	col_names = "tRNAname", col_types = "c"
) %>% pull()

swsBB23bl <- swsBB23[!swsBB23 %in% bltRNAs]
gwsBB6bl <- gwsBB6[!gwsBB6 %in% bltRNAs]
```

### Methylation Data

```{r}
dataTest <- readRDS(
	file = "data/tRNAprobesNormCancerArrayPairs.Rds"
)
```

```{r}
tRNAmethCancerNormal <- dataTest %>% 
	unnest() %>% 
	as_tibble() %>%
	filter(sample_type %in% c("Solid Tissue Normal", "Primary Tumor")) %>%
	mutate(tRNAge = tRNAname %in% swsBB23bl) %>%
	filter(!is.na(Beta_value))

```

samples used

```{r}
tRNAmethCancerNormal %>% 
	distinct(file_id, file_name, sample_id, case_id, sample_type, primary_site, age) %>% 
	arrange(sample_type, primary_site, age) %>%
	write_tsv("out/TCGA_samples_used.tsv")
```

```{r}
tRNAmethCancerNormal %>% distinct(case_id) %>% nrow()
```

```{r}
tRNAsCovered <- 
tRNAmethCancerNormal %>% distinct(tRNAname)
tRNAsCovered
```

```{r}
tRNAsCovered %>%
	filter(tRNAname %in% gwsBB6bl)

tRNAsCovered %>%
	filter(tRNAname %in% swsBB23bl)
```

# variance

```{r}
Levene <- 
onewaytests::homog.test(data = tRNAmethCancerNormal, Beta_value ~ sample_type, method = "Levene")

#Levene %>% str()

Brown_forsythe <- 
onewaytests::bf.test(data = tRNAmethCancerNormal, Beta_value ~ sample_type)

#Brown_forsythe %>% str()
```

# By tRNA age modeling

## Normal

```{r}
normalAgeModelsBytRNA <- tRNAmethCancerNormal %>%
	dplyr::filter(sample_type == "Solid Tissue Normal") %>%
	group_by(tRNAname) %>%
	#group_by(tRNAname,primary_site) %>%
	nest() %>%
	mutate(model = purrr::map(data, ~ lm(age ~ Beta_value, data = .)))
```

```{r}
#bonfer <- 0.05 / tRNAmethCancerNormal %>% dplyr::select(probeID) %>% distinct() %>% nrow()
bonfer <- 0.05 / normalAgeModelsBytRNA %>% nrow()
bonfer
```

```{r}
normalAgeModelsBytRNAG <- normalAgeModelsBytRNA %>%
	unnest(model %>% purrr::map(glance)) %>% 
	arrange(p.value)
normalAgeModelsBytRNAG %>%
	dplyr::select(-data,-model)
```

```{r}
normalAgeModelsBytRNAGsig <- normalAgeModelsBytRNAG %>%
	dplyr::select(-data, -model) %>%
	dplyr::filter(p.value < bonfer) %>% 
	arrange(p.value)

normalAgeModelsBytRNAGsig
```

```{r}
normalAgeModelsBytRNA %>%
    unnest(model %>% purrr::map(tidy)) %>% 
	filter(term == "Beta_value", p.value < bonfer)
```

```{r}
normalAgeModelsBytRNAGsig %>% distinct(tRNAname)
```

```{r}
normalAgeModelsBytRNAGsig %>% 
	dplyr::filter(tRNAname %in% gwsBB6bl)

normalAgeModelsBytRNAGsig %>% 
	dplyr::filter(tRNAname %in% swsBB23bl)
```

```{r}
# plots <- 
# tRNAmethCancerNormal %>%
# 	group_by(aa) %>%
# 	do(plot=
# 		ggplot(aes())
# 	)
```

### Clustering

```{r}
normalAgeModelsBytRNAG %>% select(-data, -model)
```

## Cancer

```{r}
cancerAgeModelsBytRNA <- tRNAmethCancerNormal %>%
	dplyr::filter(sample_type == "Primary Tumor") %>%
	group_by(tRNAname) %>%
	#group_by(tRNAname,primary_site) %>%
	nest() %>%
	mutate(model = purrr::map(data, ~ lm(age ~ Beta_value, data = .)))
```

```{r}
cancerAgeModelsBytRNAG <- cancerAgeModelsBytRNA %>%
	unnest(model %>% purrr::map(glance)) %>% 
	arrange(p.value)
cancerAgeModelsBytRNAG %>%
	dplyr::select(-data, -model)
```

```{r}
cancerAgeModelsBytRNAGsig <- cancerAgeModelsBytRNAG %>%
	dplyr::select(-data, -model) %>%
	dplyr::filter(p.value < bonfer) %>% 
	arrange(p.value)
cancerAgeModelsBytRNAGsig
```

```{r}
cancerAgeModelsBytRNAGsig %>% distinct(tRNAname)
```


```{r}
cancerAgeModelsBytRNAGsig %>% 
	dplyr::filter(tRNAname %in% swsBB23bl)
```

# By tRNA and Tissue Age modeling

```{r,fig.width=6,fig.height=8}
tRNAmethCancerNormal %>%
	dplyr::filter(sample_type == "Solid Tissue Normal") %>%
	group_by(tRNAname, primary_site) %>%
	summarise(n = n()) %>%
	spread(primary_site, n) %>%
	column_to_rownames("tRNAname") %>%
	data.matrix() %>% 
	#heatmaply()
	Heatmap(
		row_names_gp = gpar(fontsize = 8),
		na_col = "black",
		heatmap_width = unit(5.5, "inches"),
		heatmap_height = unit(7.5, "inches")#,
	)
```

```{r}
ggplotly(dynamicTicks = TRUE,
tRNAmethCancerNormal %>%
	dplyr::filter(sample_type == "Solid Tissue Normal") %>%
	group_by(tRNAname, primary_site) %>%
	summarise(n = n()) %>%
	ggplot(aes(n)) + 
		geom_density(aes(colour = primary_site))
)
```


```{r}
normalAgeModelsBytRNABySite <- tRNAmethCancerNormal %>%
	dplyr::filter(sample_type == "Solid Tissue Normal") %>%
	group_by(tRNAname, primary_site) %>%
	nest() %>%
	mutate(model = purrr::map(data, ~ lm(age ~ Beta_value, data = .)))
```

```{r}
normalAgeModelsBytRNABySiteG <- normalAgeModelsBytRNABySite %>%
	unnest(model %>% purrr::map(glance)) %>% 
	arrange(p.value)
#normalAgeModelsBytRNABySiteG
```

```{r}
bonfertRNATissue <- 0.05 / normalAgeModelsBytRNABySiteG %>% nrow()
bonfertRNATissue
```


```{r}
normalAgeModelsBytRNABySiteGsig <- normalAgeModelsBytRNABySiteG %>%
	dplyr::filter(p.value < bonfertRNATissue) %>%
	dplyr::select(-data, -model)
normalAgeModelsBytRNABySiteGsig
```

```{r}
normalAgeModelsBytRNABySiteGsig %>% 
	dplyr::filter(tRNAname %in% swsBB23bl)
```

```{r}
normalAgeModelsBytRNABySiteG %>% 
	dplyr::select(-data, -model) %>%
	dplyr::filter(tRNAname %in% swsBB23bl) %>%
	dplyr::filter(p.value < bonfer) ###!!! not correcting for all tests - use bonfertRNATissue
```

### Clustering

```{r,fig.height=8,fig.width=7}
pvalueByTissueAndtRNA <- 
normalAgeModelsBytRNABySiteG %>%
	ungroup() %>%
	dplyr::select(-data, -model) %>%
	select(tRNAname, p.value, primary_site) %>%
	spread(primary_site, p.value)
	#tRNAname,primary_site,p.value
	#spread(tRNAname,p.value)

lowCoverageTissues <- 
normalAgeModelsBytRNABySiteG %>%
	ungroup() %>%
	dplyr::select(-data, -model) %>%
	select(tRNAname, p.value, primary_site) %>%
	group_by(primary_site) %>%
	summarise(Nna = length(which(is.na(p.value) | is.nan(p.value))), n = n(), p = Nna / n) %>%
	filter(p > 0.8) %>%
	mutate(primary_site = as.character(primary_site)) %>%
	pull(primary_site)

pvalueByTissueAndtRNAm <- 
pvalueByTissueAndtRNA %>%
	select(-tRNAname) %>%
	select(-lowCoverageTissues) %>%
	data.matrix()
	
rownames(pvalueByTissueAndtRNAm) <- pvalueByTissueAndtRNA$tRNAname

pvalueByTissueAndtRNAm %>% dim()
pvalueByTissueAndtRNAm %>% heatmaply::heatmaply(
	main = "Change in DNAm with Age by tissue (p-value)"
)

pvalueByTissueAndtRNAm %>%
Heatmap(
	na_col = "black",
	heatmap_width = unit(5.5, "inches"),
	heatmap_height = unit(7.5, "inches"),
	column_title_gp = gpar(fontsize = 16, fontface = "bold"),
	column_title = "Change in DNAm with Age\nby tissue (p-value)",
	name = "p-value\n",
	column_names_rot = 45,
	column_names_gp = gpar(fontsize = 10),
	row_names_gp = gpar(
		fontsize = 10,
		col = if_else(rownames(.) %in% tRNAgeBB, "red", "black")
	),
	row_title = "tRNA gene"
)

#pvalueByTissueAndtRNAm %>%

tmp <- apply(data.matrix(pvalueByTissueAndtRNAm < 0.05), 2, as.character)
rownames(tmp) <- rownames(pvalueByTissueAndtRNAm)
tmp %>%
Heatmap(
	col = structure(c("red", "blue"), names = c("TRUE", "FALSE")),
	na_col = "black",
	heatmap_width = unit(5.5, "inches"),
	heatmap_height = unit(7.5, "inches"),
	column_title_gp = gpar(fontsize = 16, fontface = "bold"),
	column_title = "Change in DNAm with Age\nby tissue (p-value)",
	name = "p-value\n",
	column_names_rot = 45,
	column_names_gp = gpar(fontsize = 10),
	row_split = rownames(.) %>%
		gsub(pattern = "tRNA-(\\w+)-\\w+-\\w+-\\d+", replacement = "\\1"),
	row_names_gp = gpar(
		fontsize = 10,
		col = if_else(rownames(.) %in% tRNAgeBB, "red", "black")
	),
	row_title = "tRNA gene"
)
```

```{r,fig.width=7,fig.height=8}
normalAgeModelsBytRNABySiteGl <- 
normalAgeModelsBytRNABySiteG %>% 
	select(-data) %>% 
	unnest(model %>% map(broom::tidy)) %>% 
	select(-std.error, -statistic1, -p.value1) %>% 
	spread(term,estimate)

slopeByTissueAndtRNA <- 
normalAgeModelsBytRNABySiteGl %>%
	ungroup() %>%
	#dplyr::select(-data,-model) %>%
	select(tRNAname, Beta_value, primary_site) %>%
	#mutate(Beta_value = log(Beta_value)) %>%
	spread(primary_site, Beta_value)
	#tRNAname,primary_site,p.value
	#spread(tRNAname,p.value)

lowCoverageTissues <- 
normalAgeModelsBytRNABySiteGl %>%
	ungroup() %>%
	#dplyr::select(-data,-model) %>%
	select(tRNAname, Beta_value, primary_site) %>%
	group_by(primary_site) %>%
	summarise(Nna = length(which(is.na(Beta_value) | is.nan(Beta_value))), n = n(), p = Nna / n) %>%
	filter(p > 0.5) %>%
	mutate(primary_site = as.character(primary_site)) %>%
	pull(primary_site)

slopeByTissueAndtRNAm <- 
slopeByTissueAndtRNA %>%
	select(-tRNAname) %>%
	select(-lowCoverageTissues) %>%
	data.matrix()
	
rownames(slopeByTissueAndtRNAm) <- slopeByTissueAndtRNA$tRNAname

slopeByTissueAndtRNAm %>% dim()
slopeByTissueAndtRNAm %>% heatmaply::heatmaply(
	main = "Change in DNAm with Age by tissue (slope)"
)

slopeByTissueAndtRNAmHeatMap <- 
slopeByTissueAndtRNAm %>%
Heatmap(
	na_col = "black",
	heatmap_width = unit(5.5, "inches"),
	heatmap_height = unit(7.5, "inches"),
	column_title_gp = gpar(fontsize = 16, fontface = "bold"),
	column_title = "Change in DNAm with Age\nby tissue (slope)",
	name = "slope\n",
	column_names_rot = 45,
	column_names_gp = gpar(fontsize = 10),
	row_names_gp = gpar(
		fontsize = 10,
		col = if_else(rownames(.) %in% tRNAgeBB, "red", "black")
	),
	row_title = "tRNA gene"
)

png(
	filename = "graphics/slopeByTissueAndtRNAmHeatMap_bl.png",
	width = 7, height = 8, units = "in", res = 192
)
slopeByTissueAndtRNAmHeatMap
dev.off()

slopeByTissueAndtRNAmHeatMap

slopeByTissueAndtRNAmAAsplitHeatMap <- 
slopeByTissueAndtRNAm %>%
Heatmap(
	na_col = "black",
	heatmap_width = unit(5.5, "inches"),
	heatmap_height = unit(7.5, "inches"),
	column_title_gp = gpar(fontsize = 16, fontface = "bold"),
	column_title = "Change in DNAm with Age\nby tissue (slope)",
	name = "slope\n",
	column_names_rot = 45,
	column_names_gp = gpar(fontsize = 10),
	row_split = rownames(.) %>%
		gsub(pattern = "tRNA-(\\w+)-\\w+-\\w+-\\d+", replacement = "\\1"),
	row_names_gp = gpar(
		fontsize = 10,
		col = if_else(rownames(.) %in% tRNAgeBB, "red", "black")
	),
	row_title = "tRNA gene"
)

png(
	filename = "graphics/slopeByTissueAndtRNAmAAsplitHeatMap_bl.png",
	width = 7, height = 8, units = "in", res = 192
)
slopeByTissueAndtRNAmAAsplitHeatMap
dev.off()

slopeByTissueAndtRNAmAAsplitHeatMap
```

# Session Info

```{r}
sessionInfo()
```

# References
